"""Export information about GitLab."""
from collections import defaultdict
import os
import typing
from urllib import parse

from cki_lib import gitlab
from cki_lib import misc
from cki_lib import yaml
from cki_lib.cronjob import CronJob
from cki_lib.logger import get_logger
import prometheus_client

GITLAB_CONFIG = yaml.load(contents=os.environ.get('GITLAB_CONFIG', '{}'))
KNOWN_PLANS = ['free', 'premium', 'ultimate', 'opensource']
KNOWN_ALERT_STATUSES = ['executable', 'disabled', 'temporarily_disabled']
KNOWN_PIPELINE_STATUSES = ['created', 'waiting_for_resource', 'preparing', 'pending',
                           'running', 'success', 'failed', 'canceled', 'skipped', 'manual',
                           'scheduled']
KNOWN_IMPORT_STATUSES = ['none', 'scheduled', 'started', 'finished', 'failed', 'canceled']
LOGGER = get_logger(__name__)


class GitLabMetricsHourly(CronJob):
    """Calculate GitLab metrics."""

    schedule = '0 * * * *'  # once per hour

    metric_namespace_plan = prometheus_client.Enum(
        'cki_gitlab_namespace_plan',
        'Paid plan for a namespace',
        ['instance', 'namespace'],
        states=KNOWN_PLANS + ['unknown'],
    )

    metric_namespace_end_date = prometheus_client.Gauge(
        'cki_gitlab_namespace_end_date',
        'timestamp of the end date of the current plan for a namespace',
        ['instance', 'namespace'],
    )

    metric_namespace_size = prometheus_client.Gauge(
        'cki_gitlab_namespace_size',
        'Size statistics for a namespace',
        ['instance', 'namespace', 'statistic'],
    )

    metric_project_size = prometheus_client.Gauge(
        'cki_gitlab_project_size',
        'Size statistics for a project',
        ['instance', 'project', 'archived', 'statistic'],
    )

    metric_schedule_status = prometheus_client.Enum(
        'cki_gitlab_schedule_status',
        'Status of CI/CD schedules',
        ['instance', 'project', 'archived', 'active', 'description', 'cron'],
        states=KNOWN_PIPELINE_STATUSES + ['unknown'],
    )

    metric_import_status = prometheus_client.Enum(
        'cki_gitlab_import_status',
        'Status of project mirroring',
        ['instance', 'project', 'archived'],
        states=KNOWN_IMPORT_STATUSES + ['unknown'],
    )

    metric_osp_project_visibility = prometheus_client.Enum(
        'cki_gitlab_osp_project_visibility',
        'Project visibility as relevant to the GitLab Open Source Program',
        ['instance', 'project', 'archived'],
        states=['public', 'internal', 'private'],
    )

    metric_osp_license_type = prometheus_client.Enum(
        'cki_gitlab_osp_license_type',
        'License type as relevant to the GitLab Open Source Program',
        ['instance', 'project', 'archived', 'license'],
        states=['floss', 'other', 'none'],
    )

    size_query = '''
      query storage(
        $fullPath: ID!
        $after: String = ""
      ) {
        namespace(fullPath: $fullPath) {
          fullPath
          rootStorageStatistics {
            buildArtifactsSize
            containerRegistrySize
            lfsObjectsSize
            packagesSize
            pipelineArtifactsSize
            repositorySize
            snippetsSize
            storageSize
            uploadsSize
            wikiSize
          }
          projects(
            includeSubgroups: true
            after: $after
          ) {
            nodes {
              fullPath
              archived
              statistics {
                buildArtifactsSize
                containerRegistrySize
                lfsObjectsSize
                packagesSize
                pipelineArtifactsSize
                repositorySize
                snippetsSize
                storageSize
                uploadsSize
                wikiSize
              }
            }
            pageInfo {
              hasNextPage
              endCursor
            }
          }
        }
      }
    '''

    schedules_query = '''
      query schedules(
        $fullPath: ID!
        $after: String = ""
      ) {
        namespace(fullPath: $fullPath) {
          projects(
            includeSubgroups: true
            after: $after
          ) {
            nodes {
              fullPath
              archived
              pipelineSchedules {
                nodes {
                  active
                  cron
                  description
                  lastPipeline {
                    status
                  }
                }
              }
            }
            pageInfo {
              hasNextPage
              endCursor
            }
          }
        }
      }
    '''

    def update_metric_namespace(self) -> None:
        """Update the namespace metrics."""
        for namespace_url in GITLAB_CONFIG.get('namespaces', []):
            url_parts = parse.urlsplit(namespace_url)
            gl_instance = gitlab.get_instance(namespace_url)
            gl_namespace = gl_instance.namespaces.get(url_parts.path[1:])

            plan = (gl_namespace.plan
                    if gl_namespace.attributes.get('plan') in KNOWN_PLANS else 'unknown')
            # pylint: disable=no-member
            self.metric_namespace_plan.labels(gl_instance.url, gl_namespace.full_path).state(plan)

            if end_date := gl_namespace.attributes.get('end_date'):
                self.metric_namespace_end_date.labels(
                    gl_instance.url, gl_namespace.full_path,
                ).set(misc.datetime_fromisoformat_tz_utc(end_date).timestamp())

    def update_metric_namespace_size(self) -> None:
        """Update the namespace size metrics."""
        for namespace_url in GITLAB_CONFIG.get('namespaces', []):
            url_parts = parse.urlsplit(namespace_url)
            gl_client = gitlab.get_graphql_client(namespace_url)
            result = gl_client.query(
                self.size_query,
                variable_values={'fullPath': url_parts.path[1:]},
                paged_key='namespace/projects',
                operation_name='storage',
            )
            for key, value in (result['namespace']['rootStorageStatistics'] or {}).items():
                self.metric_namespace_size.labels(
                    gl_client.url, result['namespace']['fullPath'], key).set(value)
            for project in result['namespace']['projects']['nodes']:
                for key, value in project['statistics'].items():
                    self.metric_project_size.labels(
                        gl_client.url, project['fullPath'], misc.booltostr(project['archived']), key
                    ).set(value)

    def update_metric_schedule_status(self) -> None:
        """Update the schedule status metric."""
        for namespace_url in GITLAB_CONFIG.get('namespaces', []):
            url_parts = parse.urlsplit(namespace_url)
            gl_client = gitlab.get_graphql_client(namespace_url)
            result = gl_client.query(
                self.schedules_query,
                variable_values={'fullPath': url_parts.path[1:]},
                paged_key='namespace/projects',
                operation_name='schedules',
            )
            for project in misc.get_nested_key(result, 'namespace/projects/nodes', []):
                # no paging so only the first page of schedules is checked
                for schedule in misc.get_nested_key(project, 'pipelineSchedules/nodes', []):
                    status = (schedule['lastPipeline'] or {}).get('status', '').lower()
                    self.metric_schedule_status.labels(
                        gl_client.url, project['fullPath'],
                        misc.booltostr(project['archived']),
                        misc.booltostr(schedule['active']),
                        schedule['description'],
                        schedule['cron'],
                    ).state(
                        status if status in KNOWN_PIPELINE_STATUSES else 'unknown'
                    )

    def update_metric_import_status(self) -> None:
        """Update the import status metric."""
        for namespace_url in GITLAB_CONFIG.get('namespaces', []):
            url_parts = parse.urlsplit(namespace_url)
            gl_instance = gitlab.get_instance(namespace_url)
            for gl_project in gl_instance.groups.get(url_parts.path[1:], lazy=True).projects.list(
                    iterator=True, include_subgroups=True, with_shared=False):
                status = gl_project.import_status if gl_project.mirror else 'none'
                self.metric_import_status.labels(
                    gl_instance.url, gl_project.path_with_namespace,
                    misc.booltostr(gl_project.archived),
                ).state(
                    status if status in KNOWN_IMPORT_STATUSES else 'unknown'
                )

    def update_metric_osp(self) -> None:
        """Update the osp metrics."""
        for namespace_url in GITLAB_CONFIG.get('osp', []):
            url_parts = parse.urlsplit(namespace_url)
            gl_instance = gitlab.get_instance(namespace_url)
            for gl_project in gl_instance.groups.get(url_parts.path[1:], lazy=True).projects.list(
                    iterator=True, include_subgroups=True, with_shared=False):
                # project visibility
                self.metric_osp_project_visibility.labels(
                    gl_instance.url, gl_project.path_with_namespace,
                    misc.booltostr(gl_project.archived),
                ).state(gl_project.visibility)
                # https://gitlab.com/gitlab-org/os-license-checker/-/blob/main/license-checker.sh
                license_data = gl_instance.projects.get(gl_project.id, license=True).license
                key = misc.get_nested_key(license_data, 'key') or 'none'
                license_type = key if key in {'other', 'none'} else 'floss'
                self.metric_osp_license_type.labels(
                    gl_instance.url, gl_project.path_with_namespace,
                    misc.booltostr(gl_project.archived),
                    key,
                ).state(license_type)

    def run(self, **_: typing.Any) -> None:
        """Update the metrics."""
        self.update_metric_namespace()
        self.update_metric_namespace_size()
        self.update_metric_schedule_status()
        self.update_metric_import_status()
        self.update_metric_osp()


class GitLabMetricsMinutely(CronJob):
    """Calculate GitLab metrics."""

    schedule = '*/2 * * * *'  # every 2 minutes

    metric_jobs_pending = prometheus_client.Gauge(
        'cki_gitlab_jobs_pending', 'Number of jobs in pending state',
        ['name', 'stage'],
    )

    metric_jobs_queued_time = prometheus_client.Histogram(
        'cki_gitlab_jobs_queued_time', 'How long jobs are queued before starting',
        ['name', 'stage'],
        buckets=[60 * m for m in [1, 5, 10, 30, 60, 600]],
    )

    metric_webhook_alert_status = prometheus_client.Enum(
        'cki_gitlab_webhook_alert_status',
        'Alert status of webhooks',
        ['instance', 'path', 'url'],
        states=KNOWN_ALERT_STATUSES + ['unknown'],
    )

    def __init__(self) -> None:
        """Calculate GitLab metrics."""
        super().__init__()
        self.pending_jobs_count = defaultdict(int)
        # export zeros for known jobs where pending=0
        for stage, jobs in GITLAB_CONFIG.get('jobs', {}).items():
            for job in jobs:
                self.pending_jobs_count[(job, stage)] = 0

    def update_metric_pending_jobs(self):
        """Update the pending jobs metrics."""
        for key in self.pending_jobs_count:
            self.pending_jobs_count[key] = 0
        for project_url in GITLAB_CONFIG.get('projects', []):
            _, gl_project = gitlab.parse_gitlab_url(project_url)
            for job in gl_project.jobs.list(scope='pending', iterator=True):
                self.pending_jobs_count[(job.name, job.stage)] += 1
                self.metric_jobs_queued_time.labels(name=job.name,
                                                    stage=job.stage).observe(job.queued_duration)

        for (name, stage), counter in self.pending_jobs_count.items():
            self.metric_jobs_pending.labels(name=name,
                                            stage=stage).set(counter)

    def update_metric_webhook_alert_status(self) -> None:
        """Update the webhook alert status metric."""
        groups = set()
        projects = set(GITLAB_CONFIG.get('webhooks', []))
        for project in projects:
            url_parts = parse.urlsplit(project)
            project_parts = url_parts.path[1:].split('/')
            groups.update(
                parse.urlunsplit(url_parts._replace(path='/'.join(project_parts[:i])))
                for i in range(1, len(project_parts))
            )
        self._update_metric_webhook_alert_status('groups', groups)
        self._update_metric_webhook_alert_status('projects', projects)

    def _update_metric_webhook_alert_status(self, what, urls) -> None:
        for url in urls:
            url_parts = parse.urlsplit(url)
            gl_instance = gitlab.get_instance(url)
            gl_object = getattr(gl_instance, what).get(url_parts.path[1:], lazy=True)
            try:
                gl_hooks = gl_object.hooks.list(all=True)
            except Exception:  # pylint: disable=broad-except
                gl_hooks = []
            for gl_hook in gl_hooks:
                alert_status = gl_hook.attributes.get('alert_status', 'unknown')
                # pylint: disable=no-member
                self.metric_webhook_alert_status.labels(
                    gl_instance.url, url_parts.path[1:], gl_hook.url,
                ).state(
                    alert_status if alert_status in KNOWN_ALERT_STATUSES else 'unknown'
                )

    def run(self, **_):
        """Update the metrics."""
        self.update_metric_pending_jobs()
        self.update_metric_webhook_alert_status()
